# create violin + UpSet plots for AST measurements and fluoroquinolone resistance mechanisms
upset_violin_plot <- function(df, x, y, z) {
# df - dataframe; x - Flq resistance mechanism column; y - Laboratory typing method column
# z - Testing.standard
require(dplyr, ggplot2, ggupset)
# checks if lab typing method is MIC or disk diffusion
if (grepl('MIC.*$', df[y])) {
# checks if testing standard is EUCAST or CLSI
if (grepl('EUCAST', df[z])) {
df %>%
group_by(strain, Measurement, Resistance.phenotype) %>%
# !! sym() allows one to pass a column name as an argument
summarize(res_mech_list = list(!! sym(x))) %>%
ggplot(aes(res_mech_list, Measurement)) + geom_violin() + geom_count(aes(colour = Resistance.phenotype)) +
# EUCAST cut-off of 1.0 mg/L is resistant
geom_hline(aes(yintercept = 1.0, linetype = "resistant"), alpha = 0.6, color = "black") +
# EUCAST cut-off of 0.25 mg/L is susceptible
geom_hline(aes(yintercept = 0.25, linetype = "susceptible"), alpha = 0.6, color = "black") +
labs(x = "Fluoroquinolone resistance determinants", y = "Ciprofloxacin MIC measurement (mg/L)",
size = "Number of strains", linetype = "MIC breakpoints", colour = "Phenotype") +
# convert y-axis to log2 scale and labels to 3 dp
scale_y_continuous(trans = log2_trans(), breaks = 2^(-5:9), labels = function(x) round(as.numeric(x), digits = 3)) +
scale_linetype_manual(values = c("solid", "dotted")) +
scale_x_upset() +
theme_combmatrix(combmatrix.panel.line.size = 0.8)
} else {
df %>%
group_by(strain, Measurement, Resistance.phenotype) %>%
# !! sym() allows one to pass a column name as an argument
summarize(res_mech_list = list(!! sym(x))) %>%
ggplot(aes(res_mech_list, Measurement)) + geom_violin() + geom_count(aes(colour = Resistance.phenotype)) +
# CLSI cut-off of 1.0 mg/L is resistant
geom_hline(aes(yintercept = 1.0, linetype = "resistant"), alpha = 0.6, color = "black") +
# CLSI cut-off of 0.25 mg/L is susceptible
geom_hline(aes(yintercept = 0.25, linetype = "susceptible"), alpha = 0.6, color = "black") +
labs(x = "Fluoroquinolone resistance determinants", y = "Ciprofloxacin MIC measurement (mg/L)",
size = "Number of strains", linetype = "MIC breakpoints", colour = "Phenotype") +
# convert y-axis to log2 scale and labels to 3 dp
scale_y_continuous(trans = log2_trans(), breaks = 2^(-5:9), labels = function(x) round(as.numeric(x), digits = 3)) +
scale_linetype_manual(values = c("solid", "dotted")) +
scale_x_upset() +
theme_combmatrix(combmatrix.panel.line.size = 0.8)
}
} else {
if (grepl('EUCAST', df[z])) {
df %>%
group_by(strain, Measurement, Resistance.phenotype) %>%
summarize(res_mech_list = list(!! sym(x))) %>%
ggplot(aes(res_mech_list, Measurement)) + geom_violin() + geom_count(aes(colour = Resistance.phenotype)) +
# EUCAST cut-off of 21mm is resistant
geom_hline(aes(yintercept = 21, linetype = "resistant"), alpha = 0.6, color = "black") +
# EUCAST cut-off of 25mm is susceptible
geom_hline(aes(yintercept = 25, linetype = "susceptible"), color = "black") +
labs(title = "Fluoroquinolone resistance determinant combinations\nat various disk diffusion measurements", x =
"Fluoroquinolone resistance determinants", y = "Ciprofloxacin disk diffusion measurement (mm)",
size = "Number of strains", linetype = "Disk diffusion breakpoints", colour = "Phenotype") +
scale_linetype_manual(values = c("solid", "dotted")) +
scale_x_upset() +
theme_combmatrix(combmatrix.panel.line.size = 0.8)
} else {
df %>%
group_by(strain, Measurement, Resistance.phenotype) %>%
summarize(res_mech_list = list(!! sym(x))) %>%
ggplot(aes(res_mech_list, Measurement)) + geom_violin() + geom_count(aes(colour = Resistance.phenotype)) +
# CLSI cut-off of 21mm is resistant
geom_hline(aes(yintercept = 21, linetype = "resistant"), alpha = 0.6, color = "black") +
# CLSI cut-off of 26mm is susceptible
geom_hline(aes(yintercept = 26, linetype = "susceptible"), color = "black") +
labs(title = "Fluoroquinolone resistance determinant combinations\nat various disk diffusion measurements", x =
"Fluoroquinolone resistance determinants", y = "Ciprofloxacin disk diffusion measurement (mm)",
size = "Number of strains", linetype = "Disk diffusion breakpoints", colour = "Phenotype") +
scale_linetype_manual(values = c("solid", "dotted")) +
scale_x_upset() +
theme_combmatrix(combmatrix.panel.line.size = 0.8)
}
}
}# create percent stacked barchart + UpSet plots for AST measurements and fluoroquinolone resistance mechanisms
upset_bar_plot <- function(df, x) {
# df - dataframe; x - Flq resistance mechanism column
require(dplyr, ggplot2, ggupset)
df %>%
group_by(strain, Measurement) %>%
# !! sym() allows one to pass a column name as an argument
summarize(res_mech_list = list(!! sym(x)), Resistance.phenotype) %>%
ggplot(aes(res_mech_list, Measurement, fill = Resistance.phenotype)) + geom_bar(position="fill", stat="identity") +
labs(x = "Fluoroquinolone resistance determinants", y = "% phenotype",
fill = "Phenotype") +
scale_x_upset() +
theme_combmatrix(combmatrix.panel.line.size = 0.8)
}# function to group some resistance determinants
group_res_mech <- function(df, x) {
# df - data frame; x - column(s) with resistance determinanats
# {{ }} and := allows one to pass a column name as a function parameter
require(dplyr)
rare_res_qnr_genes <- c("qnrB7", "qnrE2", "qnrS2", "qnrVC1", "qnrVC6")
df %>%
# based on statistical analysis (chunk proportion), combine QRDR mutations to have just 4 types (GyrA-83, GyrA-87, ParC-80, ParC-84)
# except GyrA-87H which is found in one strain and is sensitive
mutate({{ x }} := replace({{ x }}, startsWith({{ x }}, "GyrA-83"), "GyrA-83")) %>%
mutate({{ x }} := case_when(startsWith({{ x }}, "GyrA-87") & {{ x }} != "GyrA-87H" ~ "GyrA-87",
TRUE ~ {{ x }})) %>%
mutate({{ x }} := replace({{ x }}, startsWith({{ x }}, "ParC-80"), "ParC-80")) %>%
mutate({{ x }} := replace({{ x }}, startsWith({{ x }}, "ParC-84"), "ParC-84")) %>%
# also combine the following acquired genes
mutate({{ x }} := replace({{ x }}, startsWith({{ x }}, "qepA2"), "qepA2")) %>%
mutate({{ x }} := replace({{ x }}, startsWith({{ x }}, "qnrA1"), "qnrA1")) %>%
mutate({{ x }} := replace({{ x }}, startsWith({{ x }}, "qnrB1"), "qnrB1")) %>%
mutate({{ x }} := replace({{ x }}, startsWith({{ x }}, "qnrB19"), "qnrB19")) %>%
mutate({{ x }} := replace({{ x }}, startsWith({{ x }}, "qnrB2"), "qnrB2")) %>%
mutate({{ x }} := replace({{ x }}, startsWith({{ x }}, "qnrS1"), "qnrS1")) %>%
mutate({{ x }} := replace({{ x }}, grepl(paste(rare_res_qnr_genes, collapse = "|"), {{ x }}), "rare_res_qnr"))
}library(here)
library(tidyverse)
library(scales)
library(ggupset)
library(RColorBrewer)
library(collapse)
library(DT)
library(naniar)# read antibiogram data
antibiogram <- read.delim(file = "antibiogram_combined.tsv",
header = TRUE, sep = "\t")
# filter for ciprofloxacin
# edit: remove Omp mutations from analysis as based on violin + upset plots, they
# do not seem to contribute to ciprofloxacin resistance
cipro_antibiogram <- antibiogram %>%
filter(Antibiotic == "ciprofloxacin") %>%
select(strain:ST, Flq_mutations, Flq_acquired, Antibiotic:Measurement.Round) %>%
mutate(Resistance.phenotype = case_when(grepl('MIC.*$', Laboratory.Typing.Method) & Measurement <= 0.25 ~
"susceptible",
grepl('MIC.*$', Laboratory.Typing.Method) & Testing.standard ==
"EUCAST" & Measurement > 0.5 ~ "resistant",
grepl('MIC.*$', Laboratory.Typing.Method) & Testing.standard ==
"CLSI" & Measurement >= 1 ~ "resistant",
Laboratory.Typing.Method == "Disk diffusion" & Testing.standard ==
"EUCAST" & Measurement >= 25 ~ "susceptible",
Laboratory.Typing.Method == "Disk diffusion" & Testing.standard ==
"EUCAST" & Measurement < 22 ~ "resistant",
Laboratory.Typing.Method == "Disk diffusion" & Testing.standard ==
"CLSI" & Measurement >= 26 ~ "susceptible",
Laboratory.Typing.Method == "Disk diffusion" & Testing.standard ==
"CLSI" & Measurement <= 21 ~ "resistant",
TRUE ~ "intermediate")) %>%
# filter out strains with measurement of 0.12 and 0.75 (no such MIC measurements)
filter(Measurement != 0.12 & Measurement != 0.75)
#write_delim(cipro_antibiogram, file = "cipro_antibiogram.tsv", delim = "\t")
# combine antibiogram with mutations not in kleborate data file
no_mut_kleborate <- read.delim(file = "mutations_not_in_kleborate.tsv",
header = TRUE, sep = "\t")
# combine antibiogram with mutations not in kleborate data file
cipro_antibiogram <- cipro_antibiogram %>%
left_join(no_mut_kleborate, by = c("strain")) %>%
mutate(Flq_mutations = case_when(Flq_mutations.y %in% NA ~ Flq_mutations.x,
Flq_mutations.x == "-" & Flq_mutations.y %in% NA ~ Flq_mutations.x,
Flq_mutations.x == "-" ~ Flq_mutations.y,
Flq_mutations.x != "-" & !is.na(Flq_mutations.y) & !Flq_mutations.x %in% Flq_mutations.y ~
paste(Flq_mutations.x, Flq_mutations.y, sep = ";"),
TRUE ~ Flq_mutations.x)) %>%
select(- c(Flq_mutations.x, Flq_mutations.y)) %>%
relocate(Flq_mutations, .after = ST)
# separate mutations and check for duplicates then combine them again into the Flq_mutations column
cipro_antibiogram <- cipro_antibiogram %>%
pivot_longer(Flq_mutations, names_to = "Flq.resistance.mech",
values_to = "Resistance.mech.type") %>%
separate(Resistance.mech.type, into = c("type_A", "type_B", "type_C", "type_D", "type_E", "type_F", "type_G", "type_H", "type_I"),
sep = "[;]")
cipro_antibiogram <- cipro_antibiogram %>%
pivot_longer(cols = names(cipro_antibiogram)[27:35], values_to = "Resistance.mech.type") %>%
drop_na(Resistance.mech.type) %>%
select(-name) %>%
filter(!duplicated(cbind(strain, Resistance.mech.type))) %>%
pivot_wider(names_from = Flq.resistance.mech, values_from = Resistance.mech.type,
values_fn = function(x) paste(x, collapse=";")) %>%
relocate(Flq_mutations, .before = Flq_acquired)cipro_antibiogram %>%
group_by(index) %>%
ggplot(aes(x = index)) +
geom_bar(fill = "#ff8000", alpha = 0.6) +
geom_text(aes(label = ..count..), stat = "count", nudge_y = 60) +
labs(title = "Total sample count per dataset", y = "Total sample count", fill = "Lab typing method") +
theme(axis.title.x = element_blank(), axis.text.x = element_text(angle = 45, hjust = 1))cipro_antibiogram %>%
group_by(Laboratory.Typing.Method) %>%
filter(Laboratory.Typing.Method == "MIC (broth dilution)") %>%
filter(Testing.standard == "CLSI") %>%
ggplot(aes(x = factor(Measurement), fill = index)) + geom_bar() +
# CLSI cut-off of 1.0 mg/L is resistant
geom_vline(aes(xintercept = 5, linetype = "resistant"), alpha = 0.6, color = "black") +
# CLSI cut-off of 0.25 mg/L is susceptible
geom_vline(aes(xintercept = 3, linetype = "susceptible"), color = "black") +
labs(title = "Total sample count at various MIC (broth dilution) measurements", x = "Measurement (mg/L)", y = "Frequency", fill = "Dataset",
linetype = "MIC breakpoints") +
scale_linetype_manual(values = c("solid", "dotted")) cipro_antibiogram %>%
group_by(Laboratory.Typing.Method) %>%
filter(Laboratory.Typing.Method == "MIC (broth dilution)") %>%
filter(Testing.standard == "EUCAST") %>%
ggplot(aes(x = factor(Measurement), fill = index)) + geom_bar() +
# EUCAST cut-off of 1.0 mg/L is resistant
geom_vline(aes(xintercept = 8, linetype = "resistant"), alpha = 0.6, color = "black") +
# EUCAST cut-off of 0.25 mg/L is susceptible
geom_vline(aes(xintercept = 5, linetype = "susceptible"), color = "black") +
labs(title = "Total sample count at various MIC (broth dilution) measurements", x = "Measurement (mg/L)", y = "Frequency", fill = "Dataset",
linetype = "MIC breakpoints") +
scale_linetype_manual(values = c("solid", "dotted")) # only EUCAST used as testing standard
cipro_antibiogram %>%
group_by(Laboratory.Typing.Method) %>%
filter(Laboratory.Typing.Method == "MIC (agar dilution)") %>%
ggplot(aes(x = factor(Measurement), fill = index)) + geom_bar() +
# EUCAST cut-off of 1.0 mg/L is resistant
geom_vline(aes(xintercept = 6, linetype = "resistant"), alpha = 0.6, color = "black") +
# EUCAST cut-off of 0.25 mg/L is susceptible
geom_vline(aes(xintercept = 4, linetype = "susceptible"), color = "black") +
labs(title = "Total sample count at various MIC (agar dilution) measurements", x = "Measurement (mg/L)", y = "Frequency", fill = "Dataset",
linetype = "MIC breakpoints") +
# round off x-axis labels to 2dp
scale_x_discrete(labels = function(x) round(as.numeric(x), digits = 2)) +
scale_linetype_manual(values = c("solid", "dotted")) cipro_antibiogram %>%
group_by(Laboratory.Typing.Method) %>%
filter(Laboratory.Typing.Method == "Disk diffusion") %>%
filter(Testing.standard == "CLSI") %>%
ggplot(aes(Measurement, fill = index)) + geom_bar() +
# CLSI cut-off of 21mm is resistant
geom_vline(aes(xintercept = 21, linetype = "resistant"), color = "black") +
# CLSI cut-off of 26mm is susceptible
geom_vline(aes(xintercept = 26, linetype = "susceptible"), color = "black") +
labs(title = "Total sample count at various disk diffusion measurements", x = "Measurement (mm)", y = "Frequency", fill = "Dataset",
linetype = "Disk diffusion \nbreakpoints") +
scale_linetype_manual(values = c("solid", "dotted"))cipro_antibiogram %>%
group_by(Laboratory.Typing.Method) %>%
filter(Laboratory.Typing.Method == "Disk diffusion") %>%
filter(Testing.standard == "EUCAST") %>%
ggplot(aes(Measurement, fill = index)) + geom_bar() +
# EUCAST cut-off of 21mm is resistant
geom_vline(aes(xintercept = 21, linetype = "resistant"), color = "black") +
# CLSI cut-off of 25mm is susceptible
geom_vline(aes(xintercept = 25, linetype = "susceptible"), color = "black") +
labs(title = "Total sample count at various disk diffusion measurements", x = "Measurement (mm)", y = "Frequency", fill = "Dataset",
linetype = "Disk diffusion \nbreakpoints") +
scale_linetype_manual(values = c("solid", "dotted"))# combine Flq_mutations and Flq_acquired into one column
# edit: remove Omp mutations from analysis as based on violin + upset plots, they
# do not seem to contribute to ciprofloxacin resistance
cipro_res_mech <- cipro_antibiogram %>%
pivot_longer(Flq_mutations:Flq_acquired, names_to = "Flq.resistance.mech",
values_to = "Resistance.mech.type") %>%
separate(Resistance.mech.type, into = c("type_A", "type_B", "type_C"), sep = "[;]")
cipro_res_mech <- cipro_res_mech %>%
pivot_longer(cols = names(cipro_res_mech)[26:28], values_to = "Resistance.mech.type") %>%
drop_na(Resistance.mech.type) %>%
select(-name)
# group gyrA, parC, and qnr genes
cipro_res_mech <- cipro_res_mech %>%
group_res_mech(Resistance.mech.type)
# explicitly label strains with wildtype QRDR (gyrA/parC) and no acquired genes as such
cipro_res_mech <- cipro_res_mech %>%
mutate(Resistance.mech.type = replace(Resistance.mech.type, Flq.resistance.mech=="Flq_mutations" & Resistance.mech.type=="-", "wt_QRDR")) %>%
mutate(Resistance.mech.type = replace(Resistance.mech.type, Flq.resistance.mech=="Flq_acquired" & Resistance.mech.type=="-", "no_acquired_genes"))# count number of occurrences of the different fluoroquinolone resistance determinants
summ_res_mech <- cipro_res_mech %>%
filter(Resistance.mech.type != "wt_QRDR" & Resistance.mech.type != "no_acquired_genes") %>%
group_by(Flq.resistance.mech, Resistance.mech.type) %>%
summarise(count = n())
DT::datatable(summ_res_mech,
width = "50%",
extensions = "FixedHeader",
rownames = FALSE,
options = list(paging=TRUE, fixedHeader=TRUE, scrollX=TRUE))summ_res_mech %>%
ggplot(aes(Resistance.mech.type, count, fill = Flq.resistance.mech)) +
geom_bar(stat = "identity") +
geom_text(aes(label = count), stat = "identity", nudge_y = 60) +
labs(title = "Total sample count per resistance determinant", x = "Resistance mechanism",
y = "Total sample count", fill = "Fluoroquinolone resistance \nmechanism type") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
scale_fill_brewer(palette = "Accent")cipro_res_mech %>%
filter(!(Resistance.mech.type == "no_acquired_genes")) %>% # exclude these rows as not informative
filter(Laboratory.Typing.Method == "MIC (broth dilution)") %>%
filter(Testing.standard == "CLSI") %>%
upset_violin_plot("Resistance.mech.type", "Laboratory.Typing.Method", "Testing.standard") +
labs(title = "Fluoroquinolone resistance determinant combinations at various MIC (broth dilution) measurements")cipro_res_mech %>%
filter(!(Resistance.mech.type == "no_acquired_genes")) %>% # exclude these rows as not informative
filter(Laboratory.Typing.Method == "MIC (broth dilution)") %>%
filter(Testing.standard == "EUCAST") %>%
upset_violin_plot("Resistance.mech.type", "Laboratory.Typing.Method", "Testing.standard") +
labs(title = "Fluoroquinolone resistance determinant combinations at various MIC (broth dilution) measurements") # only EUCAST used as testing standard
cipro_res_mech %>%
filter(!(Resistance.mech.type == "no_acquired_genes")) %>% # exclude these rows as not informative
filter(Laboratory.Typing.Method == "MIC (agar dilution)") %>%
upset_violin_plot("Resistance.mech.type", "Laboratory.Typing.Method", "Testing.standard") +
labs(title = "Fluoroquinolone resistance determinant combinations\nat various MIC (agar dilution) measurements") cipro_res_mech %>%
filter(!(Resistance.mech.type == "no_acquired_genes")) %>% # exclude these rows as not informative
filter(Laboratory.Typing.Method == "Disk diffusion") %>%
filter(Testing.standard == "CLSI") %>%
upset_violin_plot("Resistance.mech.type", "Laboratory.Typing.Method", "Testing.standard") cipro_res_mech %>%
filter(!(Resistance.mech.type == "no_acquired_genes")) %>% # exclude these rows as not informative
filter(Laboratory.Typing.Method == "Disk diffusion") %>%
filter(Testing.standard == "EUCAST") %>%
upset_violin_plot("Resistance.mech.type", "Laboratory.Typing.Method", "Testing.standard")cipro_res_mech %>%
filter(!(Resistance.mech.type == "no_acquired_genes")) %>% # exclude these rows as not informative
filter(Laboratory.Typing.Method == "MIC (broth dilution)") %>%
filter(Testing.standard == "CLSI") %>%
upset_bar_plot("Resistance.mech.type") +
labs(title = "Phenotypes for fluoroquinolone resistance determinant combinations")cipro_res_mech %>%
filter(!(Resistance.mech.type == "no_acquired_genes")) %>% # exclude these rows as not informative
filter(Laboratory.Typing.Method == "MIC (broth dilution)") %>%
filter(Testing.standard == "EUCAST") %>%
upset_bar_plot("Resistance.mech.type") +
labs(title = "Phenotypes for fluoroquinolone resistance determinant combinations")# only EUCAST used as testing standard
cipro_res_mech %>%
filter(!(Resistance.mech.type == "no_acquired_genes")) %>% # exclude these rows as not informative
filter(Laboratory.Typing.Method == "MIC (agar dilution)") %>%
upset_bar_plot("Resistance.mech.type") +
labs(title = "Phenotypes for fluoroquinolone resistance determinant combinations")cipro_res_mech %>%
filter(!(Resistance.mech.type == "no_acquired_genes")) %>% # exclude these rows as not informative
filter(Laboratory.Typing.Method == "Disk diffusion") %>%
filter(Testing.standard == "CLSI") %>%
upset_bar_plot("Resistance.mech.type") +
labs(title = "Phenotypes for fluoroquinolone resistance\ndeterminant combinations")cipro_res_mech %>%
filter(!(Resistance.mech.type == "no_acquired_genes")) %>% # exclude these rows as not informative
filter(Laboratory.Typing.Method == "Disk diffusion") %>%
filter(Testing.standard == "EUCAST") %>%
upset_bar_plot("Resistance.mech.type") +
labs(title = "Phenotypes for fluoroquinolone resistance\ndeterminant combinations")# count STs with resistant phenotype
top_35_st_res <- cipro_antibiogram %>%
filter(Resistance.phenotype == "resistant") %>%
filter(Flq_mutations != "-" | Flq_acquired != "-") %>%
group_by(ST) %>%
summarise(sample_count = n()) %>%
as.data.frame() %>%
arrange(desc(sample_count)) %>%
head(35) %>%
mutate(ST = factor(ST, levels = ST, ordered = TRUE))
DT::datatable(top_35_st_res,
width = "50%",
extensions = "FixedHeader",
rownames = FALSE,
options = list(paging=TRUE, fixedHeader=TRUE, scrollX=TRUE))# Blues palette with 9 colors
colour_palette <- brewer.pal(9, "Blues")
# Add more colors to this palette :
colour_palette <- colorRampPalette(colour_palette)(35)
top_35_st_res %>%
ggplot(aes(y = ST, x = sample_count, fill = ST, label = sample_count)) +
geom_bar(stat = "identity", position = "dodge") +
geom_text(hjust = -0.5) +
scale_y_discrete(limits = rev) +
ggtitle("Top STs with fluoroquinolone resistance") +
labs(x = "Sample count") +
# reverse order of colour palette
scale_fill_manual(values = rev(colour_palette)) +
theme_minimal() +
theme(legend.position = "none",
axis.text = element_text(size = 10)) +
coord_cartesian(clip = "off")st_totals <- cipro_antibiogram %>%
group_by(ST) %>%
summarise(N=n())
st_res_mech <- cipro_res_mech %>%
filter(Resistance.phenotype == "resistant") %>%
filter(Resistance.mech.type != "wt_QRDR", Resistance.mech.type != "no_acquired_genes") %>%
count(ST, Resistance.mech.type) %>%
left_join(st_totals) %>% # ST total counts
mutate(prop = n/N) %>% # proportion per ST
mutate(se = sqrt(prop*(1-prop)/N)) %>%
mutate(p_lowerCI = prop-1.96*se, p_upperCI = prop+1.96*se) %>%
mutate(p_lowerCI = replace(p_lowerCI, p_lowerCI<0, 0)) %>%
mutate(p_upperCI = replace(p_upperCI, p_upperCI>1, 1)) %>%
select(-se) %>%
mutate(ST = factor(ST, levels = unique(ST)))
DT::datatable(st_res_mech,
extensions = "FixedHeader",
rownames = FALSE,
options = list(paging=TRUE, fixedHeader=TRUE, scrollX=TRUE))st_res_mech %>%
filter(ST %in% st_res_mech$ST[st_res_mech$prop>0.1 & st_res_mech$N>50]) %>%
ggplot(aes(x=ST, y = Resistance.mech.type)) +
geom_tile(aes(fill=prop)) +
labs(subtitle = "Fluoroquinolone resistance mechanisms in top resistant STs",
y = "Fluoroquinolone resistance mechanisms", fill = "Proportion per \nST") +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8, colour = "black")) +
scale_fill_gradient(low = "white", high = "red")# separate resistance determinants
cipro_res_mech_prop <- cipro_antibiogram %>%
separate(Flq_mutations, into = c("mut_A", "mut_B", "mut_C"), sep = "[;]") %>%
separate(Flq_acquired, into = c("acq_A", "acq_B", "acq_C"), sep = "[;]")
# group determinants based on statistical analysis
cipro_res_mech_prop <- cipro_res_mech_prop %>%
group_res_mech(mut_A) %>%
group_res_mech(mut_B) %>%
group_res_mech(mut_C) %>%
group_res_mech(acq_A) %>%
group_res_mech(acq_B) %>%
group_res_mech(acq_C)
# combine previously separated resistance determinants columns
cipro_res_mech_prop <- cipro_res_mech_prop %>%
unite("Flq_mutations", mut_A, mut_B, mut_C, sep = ";", na.rm = TRUE) %>%
unite("Flq_acquired", acq_A, acq_B, acq_C, sep = ";", na.rm = TRUE)
# combine all the resistance determinants columns using a ";" excluding rows without
# mutations
cipro_res_mech_prop <- cipro_res_mech_prop %>%
# replace "-" with NA
replace_with_na(replace = list(Flq_mutations = "-", Flq_acquired = "-")) %>%
unite("res_mech", Flq_mutations:Flq_acquired, sep = ";", remove = FALSE, na.rm = TRUE) %>%
# replace NA with "-"
replace_na(list(Flq_mutations = "-", Flq_acquired = "-"))
# replace empty cells with "-"
cipro_res_mech_prop$res_mech <- sub("^$", "-", cipro_res_mech_prop$res_mech)
# calculate proportions of different combinations
st_res_mech_prop <- cipro_res_mech_prop %>%
filter(Resistance.phenotype == "resistant") %>%
filter(res_mech != "-") %>%
group_by(ST) %>%
count(ST, res_mech) %>%
left_join(st_totals) %>% # ST total counts
mutate(prop = n/N) %>% # proportion per ST
mutate(se = sqrt(prop*(1-prop)/N)) %>%
mutate(p_lowerCI = prop-1.96*se, p_upperCI = prop+1.96*se) %>%
mutate(p_lowerCI = replace(p_lowerCI, p_lowerCI<0, 0)) %>%
mutate(p_upperCI = replace(p_upperCI, p_upperCI>1, 1)) %>%
select(-se) %>%
mutate(ST = factor(ST, levels = unique(ST))) %>%
arrange(res_mech)
DT::datatable(st_res_mech_prop,
extensions = "FixedHeader",
rownames = FALSE,
options = list(paging=TRUE, fixedHeader=TRUE, scrollX=TRUE))# create heatmap/upset plot of different combinations per ST
st_res_mech_prop %>%
filter(ST %in% st_res_mech$ST[st_res_mech$prop>0.1 & st_res_mech$N>50]) %>%
summarise(ST, res_mech, prop) %>%
ggplot(aes(x=res_mech, y=ST, fill=prop)) +
geom_tile() +
axis_combmatrix(sep = ";") +
theme_combmatrix(combmatrix.panel.line.size = 0.8) +
scale_fill_gradient(low = "white", high = "red") +
labs(x = "Resistance determinants", fill = "Proportion per\nST",
title = "Proportions of fluoroquinolone resistance determinant combinations per ST")no_res_mech <- cipro_antibiogram %>%
filter(Flq_mutations == "-", Flq_acquired == "-") %>%
filter(Resistance.phenotype == "resistant")
st_no_res_mech <- no_res_mech %>%
group_by(ST) %>%
summarise(sample_count = n()) %>%
as.data.frame() %>%
arrange(desc(sample_count)) %>%
mutate(ST = factor(ST, levels = ST, ordered = TRUE))
DT::datatable(st_no_res_mech,
width = "50%",
extensions = "FixedHeader",
rownames = FALSE,
options = list(paging=TRUE, fixedHeader=TRUE, scrollX=TRUE))st_no_res_mech %>%
filter(sample_count > 2) %>%
ggplot(aes(ST, sample_count)) +
geom_bar(stat = "identity", fill = "blue", alpha = 0.7) +
labs(title = "Resistant STs with no fluoroquinolone resistance mechanisms", y = "Count") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))sus_res_mech <- cipro_res_mech %>%
filter(Resistance.phenotype == "susceptible") %>%
filter(Resistance.mech.type != "wt_QRDR", Resistance.mech.type != "no_acquired_genes")
st_sus_res_mech <- sus_res_mech %>%
group_by(ST) %>%
summarise(res_mech = Resistance.mech.type) %>%
as.data.frame() %>%
count(ST, res_mech, sort = TRUE) %>%
mutate(ST = factor(ST, levels = unique(ST)))
DT::datatable(st_sus_res_mech,
width = "50%",
extensions = "FixedHeader",
rownames = FALSE,
options = list(paging=TRUE, fixedHeader=TRUE, scrollX=TRUE))st_sus_res_mech %>%
filter(n > 1) %>%
ggplot(aes(ST, n, fill = res_mech)) +
geom_bar(stat = "identity") +
labs(title = "Fluoroquinolone resistance mechanisms employed by the top susceptible STs", y = "Count", fill = "Fluoroquinolone resistance\nmechanisms") +
# put legend in one column
guides(fill = guide_legend(ncol = 1)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))sus_res_mech %>%
filter(Laboratory.Typing.Method == "MIC (broth dilution)") %>%
filter(Testing.standard == "CLSI") %>%
upset_violin_plot("Resistance.mech.type", "Laboratory.Typing.Method", "Testing.standard") +
labs(title = "Fluoroquinolone resistance determinant combinations\nat various MIC (broth dilution) measurements") sus_res_mech %>%
filter(Laboratory.Typing.Method == "MIC (broth dilution)") %>%
filter(Testing.standard == "EUCAST") %>%
upset_violin_plot("Resistance.mech.type", "Laboratory.Typing.Method", "Testing.standard") +
labs(title = "Fluoroquinolone resistance determinant combinations\nat various MIC (broth dilution) measurements") # only EUCAST used as testing standard
sus_res_mech %>%
filter(Laboratory.Typing.Method == "MIC (agar dilution)") %>%
upset_violin_plot("Resistance.mech.type", "Laboratory.Typing.Method", "Testing.standard") +
labs(title = "Fluoroquinolone resistance determinant combinations\nat various MIC (agar dilution) measurements") # only EUCAST has values
sus_res_mech %>%
filter(Laboratory.Typing.Method == "Disk diffusion") %>%
filter(Testing.standard == "EUCAST") %>%
upset_violin_plot("Resistance.mech.type", "Laboratory.Typing.Method", "Testing.standard")# calculate frequencies of different resistance mechanisms for the different phenotypes
# including numbers for the wildtype groups for comparison to compare resistance frequency
# amongst strains with the determinant vs strains without
mech_freq <- cipro_res_mech %>%
group_by(Resistance.mech.type, Resistance.phenotype) %>%
filter(Resistance.phenotype != "intermediate") %>%
summarise(n = n()) %>%
ungroup() %>%
group_by(Resistance.mech.type) %>%
pivot_wider(names_from = Resistance.phenotype, values_from = n)
# convert na values to 0
mech_freq[is.na(mech_freq)] <- 0
# find totals of different resistance mechanisms
mech_freq <- mech_freq %>%
mutate(total = sum(c(resistant, susceptible)))
# compare each gyrA/parC SNP vs wildtype QRDR
qrdr_wt <- mech_freq %>% filter(Resistance.mech.type == "wt_QRDR")
qrdr_mut <- mech_freq %>%
filter(startsWith(Resistance.mech.type, "GyrA") | startsWith(Resistance.mech.type, "ParC")) %>%
mutate(wt_res = qrdr_wt[2], wt_total=qrdr_wt[4]) # add columns with the wildtype counts for res & total
# run prop.test() to each row, comparing the resistant counts (x) with the total counts (n)
qrdr_prop_out <- dapply(as.matrix(qrdr_mut), MARGIN = 1,
FUN = function(y) broom::tidy(prop.test(x=c(as.numeric(y[2]), as.numeric(y[5])), n=c(as.numeric(y[4]), as.numeric(y[6])))))
# combine qrdr_mut and qrdr_prop_out
qrdr_mut_freq_prop <- cbind(qrdr_mut, qrdr_prop_out) %>%
select(resistant, total, estimate1, estimate2, p.value) %>%
mutate(p.value=as.numeric(p.value))
qrdr_mut_freq_prop
# specific mutations observed more than once, significantly associated with elevated resistance (vs wildtype QRDR)
view(qrdr_mut_freq_prop %>% filter(total>1) %>% filter(p.value < 0.05))
# the data provides evidence that ANY mutation at the 4 codons we track is associated with resistance, even though for some individual mutations we only have one observation:
# check gyrA83, there are 5 mutations observed at this codon (A, F, I, L, Y)
# 4 have large numbers (≥10) and significant p-values, gyrA-83A is observed only once but is also resistant
qrdr_mut_freq_prop %>% filter(startsWith(Resistance.mech.type,"GyrA-83"))
# Similar for GyrA-87, except that GyrA87-H is only seen in one strain and it is sensitive, so exclude this one (not a borderline phenotype)
qrdr_mut_freq_prop %>% filter(startsWith(Resistance.mech.type,"GyrA-87"))
# ParC-80 and ParC-84 look the same as for GyrA-83, ie all mutations at the site have same effect can be grouped together
qrdr_mut_freq_prop %>% filter(startsWith(Resistance.mech.type,"ParC-80"))
qrdr_mut_freq_prop %>% filter(startsWith(Resistance.mech.type,"ParC-84"))
# based on the statistical analysis, QRDR mutations will be updated to have just 4 types
# (GyrA-83, GyrA-87, ParC-80, ParC-84) instead of 19 most of which are rare
# compare each Omp mutations vs wildtype Omp
#omp_wt <- mech_freq %>% filter(Resistance.mech.type == "wt_Omp")
#omp_mut <- mech_freq %>%
#filter(startsWith(Resistance.mech.type, "Omp")) %>%
#mutate(wt_res = omp_wt[2], wt_total=omp_wt[4]) # add columns with the wildtype counts for res & total
# run prop.test() to each row, comparing the resistant counts (x) with the total counts (n)
#omp_prop_out <- dapply(as.matrix(omp_mut), MARGIN = 1,
#FUN = function(y) broom::tidy(prop.test(x=c(as.numeric(y[2]), as.numeric(y[5])), n=c(as.numeric(y[4]), as.numeric(y[6])))))
#omp_mut_freq_prop <- cbind(omp_mut, omp_prop_out) %>%
#select(resistant, total, estimate1, estimate2, p.value) %>%
#mutate(p.value=as.numeric(p.value))
# all four mutations (truncation of either ompK35 or ompK36; and insertion of GD or TD in OmpK36, are all significantly associated with resistance)
#view(omp_mut_freq_prop)
# compare acquired vs no acquired genes
no_acquired <- mech_freq %>% filter(Resistance.mech.type == "no_acquired_genes")
acquired_genes <- mech_freq %>%
filter(startsWith(Resistance.mech.type, "qnr") | startsWith(Resistance.mech.type, "qep") ) %>%
mutate(wt_res = no_acquired[2], wt_total=no_acquired[4]) # add columns with the wildtype counts for res & total
# run prop.test() to each row, comparing the resistant counts (x) with the total counts (n)
acquired_genes_prop_out <- dapply(as.matrix(acquired_genes), MARGIN = 1,
FUN = function(y) broom::tidy(prop.test(x=c(as.numeric(y[2]), as.numeric(y[5])), n=c(as.numeric(y[4]), as.numeric(y[6])))))
acquired_genes_freq_prop <- cbind(acquired_genes, acquired_genes_prop_out) %>%
select(resistant, total, estimate1, estimate2, p.value) %>%
mutate(p.value=as.numeric(p.value))
view(acquired_genes_freq_prop)
# from the statistical analysis, the following can be combined:
# qepA2 (n=1, which is resistant) with qepA2* (n=17, all of which are resistant)
# qnrA1 and qnrA1^ (note the ^ means they are identical at protein level anyway) as all are resistant
# qnrB1.v1, qnrB1.v2, qnrB1.v2^ for similar reasons
# qnrB19, qnrB19^
# qnrB2.1, qnrB2.2
# qnrS1, qnrS1?, qnrS1*
# could also combine all the 'rare' genes that are only seen 1-2 times each but are always resistant:
# -> qnrB7, qnrE2, qnrS2, qnrVC1, qnrVC6sessionInfo()## R version 4.2.2 Patched (2022-11-10 r83330)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.1 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
##
## locale:
## [1] LC_CTYPE=en_GB.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_GB.UTF-8 LC_COLLATE=en_GB.UTF-8
## [5] LC_MONETARY=en_GB.UTF-8 LC_MESSAGES=en_GB.UTF-8
## [7] LC_PAPER=en_GB.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] naniar_0.6.1 DT_0.26.1 collapse_1.8.9 RColorBrewer_1.1-3
## [5] ggupset_0.3.0 scales_1.2.1 forcats_0.5.2 stringr_1.4.1
## [9] dplyr_1.0.10 purrr_0.3.5 readr_2.1.3 tidyr_1.2.1
## [13] tibble_3.1.8 ggplot2_3.4.0 tidyverse_1.3.2 here_1.0.1
##
## loaded via a namespace (and not attached):
## [1] Rcpp_1.0.9 lubridate_1.9.0 assertthat_0.2.1
## [4] rprojroot_2.0.3 digest_0.6.30 utf8_1.2.2
## [7] R6_2.5.1 cellranger_1.1.0 backports_1.4.1
## [10] visdat_0.5.3 reprex_2.0.2 evaluate_0.18
## [13] highr_0.9 httr_1.4.4 pillar_1.8.1
## [16] rlang_1.0.6 googlesheets4_1.0.1 readxl_1.4.1
## [19] rstudioapi_0.14 jquerylib_0.1.4 rmarkdown_2.18
## [22] labeling_0.4.2 googledrive_2.0.0 htmlwidgets_1.5.4
## [25] munsell_0.5.0 broom_1.0.1 compiler_4.2.2
## [28] modelr_0.1.10 xfun_0.35 pkgconfig_2.0.3
## [31] htmltools_0.5.3 tidyselect_1.1.2 fansi_1.0.3
## [34] crayon_1.5.2 tzdb_0.3.0 dbplyr_2.2.1
## [37] withr_2.5.0 grid_4.2.2 jsonlite_1.8.3
## [40] gtable_0.3.0 lifecycle_1.0.3 DBI_1.1.3
## [43] magrittr_2.0.3 cli_3.4.1 stringi_1.7.8
## [46] cachem_1.0.6 farver_2.1.1 fs_1.5.2
## [49] xml2_1.3.3 bslib_0.4.1 ellipsis_0.3.2
## [52] generics_0.1.2 vctrs_0.5.1 tools_4.2.2
## [55] glue_1.6.2 crosstalk_1.2.0 hms_1.1.2
## [58] parallel_4.2.2 fastmap_1.1.0 yaml_2.3.6
## [61] timechange_0.1.1 colorspace_2.0-3 gargle_1.2.1
## [64] rvest_1.0.3 knitr_1.41 haven_2.5.1
## [67] sass_0.4.3